%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% beamer %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% To run - pdflatex filename.tex
%      acroread filename.pdf
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\documentclass[compress,oilve]{beamer}
\mode<presentation>
\setbeamertemplate{itemize/enumerate body begin}{\normalsize}
\setbeamertemplate{itemize/enumerate subbody begin}{\small}

\usetheme[]{CambridgeUS}
% other themes: AnnArbor, Antibes, Bergen, Berkeley, Berlin, Boadilla, boxes, CambridgeUS, Copenhagen, Darmstadt, default, Dresden, Frankfurt, Goettingen,
% Hannover, Ilmenau, JuanLesPins, Luebeck, Madrid, Maloe, Marburg, Montpellier, PaloAlto, Pittsburg, Rochester, Singapore, Szeged, classic

\usecolortheme{beaver}
% color themes: albatross, beaver, beetle, crane, default, dolphin,  fly, lily, orchid, rose, seagull, seahorse, sidebartab, whale, wolverine

\usefonttheme{professionalfonts}
% font themes: default, professionalfonts, serif, structurebold, structureitalicserif, structuresmallcapsserif


\hypersetup{pdfpagemode=FullScreen} % makes your presentation go automatically to full screen

% define your own colors:
\definecolor{Red}{rgb}{1,0,0}
\definecolor{Blue}{rgb}{0,0,1}
\definecolor{Green}{rgb}{0,1,0}
\definecolor{magenta}{rgb}{1,0,.6}
\definecolor{lightblue}{rgb}{0,.5,1}
\definecolor{lightpurple}{rgb}{0.8, 0.6, 0.9}
\definecolor{gold}{rgb}{.6,.5,0}
\definecolor{orange}{rgb}{1,0.4,0}
\definecolor{hotpink}{rgb}{1,0,0.5}
\definecolor{newcolor2}{rgb}{.5,.3,.5}
\definecolor{newcolor}{rgb}{0,.3,1}
\definecolor{newcolor3}{rgb}{1,0,.35}
\definecolor{darkgreen1}{rgb}{0, .35, 0}
\definecolor{darkgreen}{rgb}{0, .6, 0}
\definecolor{darkred}{rgb}{.75,0,0}
\definecolor{skyblue}{HTML}{75bbfd}

\definecolor{olive}{cmyk}{0.64,0,0.95,0.4}
\definecolor{purpleish}{cmyk}{0.75,0.75,0,0}

% can also choose different themes for the "inside" and "outside"

% \usepackage{beamerinnertheme_______}
% inner themes include circles, default, inmargin, rectangles, rounded

% \usepackage{beamerouterthemesmoothbars}
% outer themes include default, infolines, miniframes, shadow, sidebar, smoothbars, smoothtree, split, tree


\useoutertheme[subsection=true, height=40pt]{smoothbars}

% to have the same footer on all slides
%\setbeamertemplate{footline}[text line]{STUFF HERE!}
\setbeamertemplate{footline}[text line]{} % makes the footer EMPTY
% include packages
%

%show the page numbers in footnote
%\addtobeamertemplate{navigation symbols}{}{%
	%	\usebeamerfont{footline}%
	%	\usebeamercolor[fg]{footline}%
	%	\hspace{1em}%
	%	\insertframenumber/\inserttotalframenumber
	%}

\setbeamercolor{footline}{fg=purpleish}
\setbeamerfont{footline}{series=\bfseries}

%add color to curent subsection
\setbeamertemplate{section in head/foot}{\hfill\tikz\node[rectangle, fill=darkred, rounded corners=1pt,inner sep=1pt,] {\textcolor{white}{\insertsectionhead}};}
\setbeamertemplate{section in head/foot shaded}{\textcolor{darkred}{\hfill\insertsectionhead}}

% Remove bullet of subsections
\setbeamertemplate{headline}
{%
	\begin{beamercolorbox}{section in head/foot}
		\insertsectionnavigationhorizontal{\textwidth}{}{}
	\end{beamercolorbox}%
}


% modify headlline, specially headline size
\setbeamertemplate{headline}{%
	\leavevmode%
	\hbox{%
		\begin{beamercolorbox}[wd=\paperwidth,ht=3.5ex,dp=1.125ex]{palette quaternary}%
			\insertsectionnavigationhorizontal{\paperwidth}{}{\hskip0pt plus1filll}
		\end{beamercolorbox}%
	}
}

\setbeamertemplate{footline}{%
	\leavevmode%
	\hbox{\begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}%
			\usebeamerfont{author in head/foot}\insertshortauthor ~ \insertshortinstitute
		\end{beamercolorbox}%
		\begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}%
			\usebeamerfont{title in head/foot}\insertshorttitle\hfill\insertframenumber\,/\,\inserttotalframenumber
	\end{beamercolorbox}}%
	\vskip0pt%
}


%\setbeamertemplate{navigation symbols}{}

\title{Decision Tree}
\author{ML Instruction Team, Fall 2022}
\institute[]{CE Department \newline  Sharif University of Technology \newline \newline}
\date[\today]{}
%\titlegraphic{\includegraphics[scale=.35]{example-image}}



%Write \usepackage{etex} just after the \documentclass line (it should be the first loaded package).
\usepackage{etex}
\usepackage{subcaption}
\usepackage{multicol}
\usepackage{amsmath}
\usepackage{epsfig}
\usepackage{graphicx}
\usepackage[all,knot]{xy}
\xyoption{arc}
\usepackage{url}
\usepackage{multimedia}
\usepackage{hyperref}
\hypersetup{colorlinks,linkcolor=blue,citecolor=redorange,urlcolor=darkred}
\usepackage{multirow}
\usepackage[font={scriptsize}]{caption}
\usepackage{pgf}
\usepackage{fontspec}
%\setsansfont[Scale=MatchLowercase, BoldFont = * Bold, ItalicFont = * Italic]{Caladea}

%\usepackage{enumitem,xcolor}
%\newcommand{\labelitemi}{$\blacksquare$}
%\newcommand{\labelitemii}{$\diamond$}
%\newcommand{\labelitemiii}{$\square$}
%\newcommand{\labelitemiv}{$\ast$}
%\setbeamercolor*{item}{fg=red}


\usefonttheme{professionalfonts} 
\setbeamertemplate{itemize item}{\color{skyblue}$\blacksquare$}
\setbeamertemplate{itemize subitem}{\color{hotpink}$\triangleright$}
\setbeamertemplate{itemize subsubitem}{\color{orange}$\bullet$}


\usepackage{anyfontsize}
\usepackage{t1enc}
\usepackage{tikz}
\usetikzlibrary{calc,trees,positioning,arrows,chains,shapes.geometric,decorations.pathreplacing,decorations.pathmorphing,shapes,matrix,shapes.symbols}


\usepackage{xcolor}
\newcommand{\tc}[2]{
	\textcolor{#1}{\hspace{-2pt}#2\hspace{-2pt}}
}

\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{assumption}[theorem]{Assumption}

%\usepackage{fontspec,unicode-math}
%\setmainfont[Scale=0.9]{Nimbus Roman No9 L}
%\setmonofont[Scale=0.9]{Monaco}
\setsansfont[Scale=1]{Times New Roman}

\newcommand{\vect}[1]{\boldsymbol{#1}}

\definecolor{strings}{rgb}{.624,.251,.259}
\definecolor{keywords}{rgb}{.893,.188,.01}
\definecolor{comment}{rgb}{.322,.451,.322}

%\usepackage{smartdiagram}
%\usesmartdiagramlibrary{additions}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Title Page Info %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Begin Your Document %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
	
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\fontsize{9}{9}
\begin{frame}[noframenumbering, plain]
	\titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Intuition}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame{\frametitle{Motivation}
	\begin{itemize}
		\item PCA identifies one or more orthogonal directions that capture the greatest amount of variance in a feature matrix $X\in\mathbb{R}^{m\times n}$.
		
		\medskip
		\item
		Assuming zero-mean feature matrix $X\in\mathbb{R}^{m\times n}$, the variance of the samples' projections onto a\tc{keywords}{unit vector}$v$ is given by:
		\begin{equation*}
			\text{Var}(Xv)=\mathbb{E}[(Xv-\mathbb{E}(Xv))^2]=\frac{1}{m} \sum_{i=1}^m(x_i^{t} v)^2=\frac{1}{m}\|X v\|^2=\frac{1}{m} v^{t} X^{t} X v
		\end{equation*}
	
		\item In light of this consideration, we define the first desired vector $v_1$ as the solution to the constrained optimization problem:
		\begin{equation*}
			\max\limits_{\|v\|_2=1} v^{t} X^{t} X v
		\end{equation*}
	
	\item We convert this constrained optimization problem into an unconstrained one by writing down its Lagrangian:
	\begin{equation*}
		\mathcal{L}(v):=v^{t} X^{t} X v-\lambda(v^{t} v-1)
	\end{equation*}
	\end{itemize}	
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame{\frametitle{First PC}
	\begin{itemize}
		\item First-order necessary conditions for optimal value imply that:
		\begin{equation*}
			0=\nabla \mathcal{L}(v_1)=2 X^{t} X v_1-2 \lambda v_1
		\end{equation*}
	
		\medskip
		\item Since $X^{t} X v_1=\lambda v_1$, $v_1$ is an\tc{keywords}{eigenvector}of $X^{t} X$ with eigenvalue $\lambda$. 
		
		\medskip
		\item Since we constrain $\|v_1\|_2^2=v_1^{t} v_1=1$, the value of the objective is precisely:
		
		\begin{equation*}
			v_1^{t} X^{t} X v_1=v_1^{t}(\lambda v_1)=\lambda v_1^{t} v_1=\lambda
		\end{equation*}
	
		\medskip
		\item The\tc{keywords}{optimal value}is $\lambda=\lambda_{\max}(X^{t} X)$, which is achieved when $v_1$ is a\tc{keywords}{unit eigenvector}of $X^{t} X$ corresponding to its\tc{keywords}{largest}eigenvalue.
		
	\end{itemize}

}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{PC Evaluation}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame{\frametitle{More PCs?}
\begin{itemize}
	\item How to find more direction with the desired property?
	
	\begin{itemize}
		\item Ideally, the subsequent directions found should also be directions of high variance.
		\item They should be orthogonal to the existing ones in order to minimize redundancy. 
	\end{itemize}

	\medskip
	\item We define the $k$-th loading vector $v_k$ as the solution to the constrained optimization problem:
	\begin{equation*}
		\max\limits_{v} v^{t} X^{t} X v \hspace{8pt} \mbox{subject to} \hspace{8pt} v^{t} v=1,
		v^{t} v_i=0, \quad i=1, \ldots, k-1
	\end{equation*}
	
	
	\item \textbf{Claim}: $v_k$ is a\tc{keywords}{unit eigenvector}of $X^{t} X$ corresponding to its $k$-th\tc{keywords}{largest}eigenvalue.
	
	\medskip
	\item The unit vector that defines the $k$-th axis is called the $k$-th principal component (PC).
\end{itemize}
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame{\frametitle{Evaluation of PCs}
	\begin{itemize}
		\item Assuming the singular value decomposition of centered feature matrix $X$ as follows:
			\begin{equation*}
				\begin{aligned}
					&X=U \Sigma V^T=[u_1, u_2, \cdots, u_r][\begin{array}{ccccc}
						\sigma_1 & & & 0 \\
						& \sigma_2 & & \\
						& & \ddots & \\
						0 & & & \\
						& & & & \sigma_r
					\end{array}][\begin{array}{c}
						v_1^T \\
						v_2^T \\
						\vdots \\
						v_r^T
					\end{array}]
				\end{aligned}
			\end{equation*}

			The first $k$ PCs are $W_k = [v_1, v_2, \cdots, v_k]$.
			
		\medskip
		\item \tc{keywords}{Explained Variance Ratio}explains the proportion of the dataset’s variance that lies along the axis of each PC.
		
		\medskip
		\item PCA can also be viewed as the projection of the sample points to the subspace with the minimum perpendicular distance.
	\end{itemize}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame{\frametitle{Other Derivation?}
		\vspace{-1cm}
	\begin{definition}
		For a matrix $X$, operator 2-norm is defined as
		\begin{equation*}
			\|X\|_2=\sup \frac{\|X v\|_2}{\|v\|_2}=\max (s_i)
		\end{equation*}
		and Frobenius norm as
		\begin{equation*}
			\|X\|_F=\sqrt{\sum_{i j} X_{i j}^2}=\sqrt{\operatorname{tr}(X^t X)}=\sqrt{\sum \sigma_i^2}
		\end{equation*}
		where $\sigma_i$ are singular values of $X$, i.e. diagonal elements of $\Sigma$ in the singular value decomposition $X=U \Sigma V^t$
	\end{definition}
	
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame{\frametitle{Other Derivation?}

	\begin{itemize}
		\item PCA is given by the same singular value decomposition when the data are centered. 
		
		\medskip
		\item $U \Sigma$ are principal components, and $V$ are principal axes, i.e. eigenvectors of the covariance matrix.
		
		\medskip
		\item
		The reconstruction of $X$ with only the $k$ principal components corresponding to the $k$ largest singular values is given by $X_k=U_k \Sigma_k V_k^{\top}$.
		
		\medskip
		\item
		The \tc{keywords}{Eckart-Young} theorem says that $X_k$ is the matrix minimizing the norm of the reconstruction error $\|X-A\|$ among all matrices $A$ of rank $k$. 
		
		\medskip
		\item This is true for both, Frobenius norm and the operator 2-norm
	\end{itemize}


}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frametitle{Final Notes}
\centering
\vspace{50 pt}
\textbf{Thank You!}
\vspace{50pt}

\textbf{Any Question?}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}